# -*- coding: utf-8 -*- """ Created on Sun May 26 02:15:11 2024 @author: BDYGS """ import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn from torch.autograd import Variable # import tushare as ts from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error from torch.utils.data import TensorDataset from tqdm import tqdm class Config(): # data_path = "C:\\Users\\ZMK\\Desktop\\GRU\永定河井.csv" timestep = 60 # 时间步长,就是利用多少时间窗口 batch_size = 30 # 批次大小 feature_size = 8 # 每个步长对应的特征数量,这里只使用1维,每天的风速 hidden_size = 256 # 隐层大小 output_size = 15 # 由于是单输出任务,最终输出层大小为1,预测未来1天风速 num_layers = 2 # gru的层数 epochs = 100 # 迭代轮数 best_loss = 0 # 记录损失 learning_rate = 0.0003 # 学习率 # model_name = 'GRU_ZMK' # 模型名称 # save_path = 'C://Users//ZMK//Desktop//GRU//{}.pth'.format(model_name) # 最优模型保存路径 config = Config() def normalization(data,label): mm_x=MinMaxScaler() # 导入sklearn的预处理容器 mm_y=MinMaxScaler() data=mm_x.fit_transform(data) # 对数据和标签进行归一化等处理 label=mm_y.fit_transform(label) return data,label,mm_y def split_windows(data,seq_len,output_size): x=[] y=[] for i in range(len(data)-seq_len-1-output_size): # range的范围需要减去时间步长和1 _x=data[i:(i+seq_len),:] _y=data[(i+seq_len):(i+seq_len+output_size),2:] #注意!!!这个地方是取label的 x.append(_x) y.append(_y) print('split_windows_i:',i) print(_x.shape,_y.shape) x,y=np.array(x),np.array(y) print('x.shape,y.shape=\n',x.shape,y.shape) return x,y def split_windows_long(data,seq_len,output_size): print(len(data)) x=[] y=[] for i in range(int(len(data)/output_size)-4): a = i*output_size # print(a) _x=data[a:a+seq_len,:] # print(_x.shape) _y=data[a+seq_len:a+seq_len+output_size,2:] #注意!!!这个地方是取label的 # print(_y.shape) x.append(_x) y.append(_y) print('split_windows_i:',i) # print(_x,_y) x,y=np.array(x),np.array(y) print('x.shape,y.shape=\n',x.shape,y.shape) # (1035, 60, 4) (1035,) return x,y def nash_sutcliffe_efficiency(y_true, y_pred): """ 计算Nash-Sutcliffe Efficiency指标。 参数: y_true : array-like, 真实观测值 y_pred : array-like, 预测值 返回: nse : float, Nash-Sutcliffe Efficiency """ return 1 - np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2) # 7.定义GRU网络 class GRU(nn.Module): def __init__(self, feature_size, hidden_size, num_layers, output_size): super(GRU, self).__init__() self.hidden_size = hidden_size self.output_size = output_size # 隐层大小 self.num_layers = num_layers # gru层数 # feature_size为特征维度,就是每个时间点对应的特征数量,这里为1 self.gru = nn.GRU(feature_size, hidden_size, num_layers, dropout=0.8,batch_first=True) self.fc1 = nn.Linear(self.hidden_size, self.output_size) self.fc2 = nn.Linear(self.hidden_size, self.output_size) self.fc3 = nn.Linear(self.hidden_size, self.output_size) self.fc4 = nn.Linear(self.hidden_size, self.output_size) self.fc5 = nn.Linear(self.hidden_size, self.output_size) self.fc6 = nn.Linear(self.hidden_size, self.output_size) def forward(self, x, hidden=None): batch_size = x.size()[0] # 获取批次大小 # 初始化隐层状态 if hidden is None: h_0 = x.data.new(self.num_layers, batch_size, self.hidden_size).fill_(0).float() else: h_0 = hidden # GRU运算 output, h_0 = self.gru(x, h_0) # 获取GRU输出的维度信息 batch_size, timestep, hidden_size = output.shape # 将output变成 batch_size * timestep, hidden_dim # output = output.reshape(-1, hidden_size) preds = [] pred1, pred2, pred3 = self.fc1(output), self.fc2(output), self.fc3(output) pred1, pred2, pred3 = pred1[:, -1, :], pred2[:, -1, :], pred3[:, -1, :] pred4, pred5, pred6 = self.fc4(output), self.fc5(output), self.fc6(output) pred4, pred5, pred6 = pred4[:, -1, :], pred5[:, -1, :], pred6[:, -1, :] pred = torch.stack([pred1, pred2, pred3,pred4, pred5, pred6], dim=2) return pred model = GRU(config.feature_size, config.hidden_size, config.num_layers, config.output_size) # 定义GRU网络 print(model) loss_function = nn.MSELoss() # 定义损失函数 optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate) # 定义优化器 # model.load_state_dict(torch.load('C://Users//ZMK//Desktop//GRU//GRU_YDH.pth')) model.eval() #pre df_pre = pd.read_csv("C:\\Users\\ZMK\\Desktop\\GRU\\永定河井-pre.csv",parse_dates=["date"],index_col=[0]) print(df_pre.shape) data_pre = df_pre.iloc[:,0:8] label_pre = df_pre.iloc[:,7] #label没有实际作用,主要用作正则化缩放的,不参与计算 data_pre = data_pre.values label_pre = label_pre.values.reshape(-1,1) data_pre,label_pre,mm_y_pre = normalization(data_pre,label_pre) dataX_pre,dataY_pre = split_windows_long(data_pre,config.timestep,config.output_size) dataX_pre = Variable(torch.Tensor(np.array(dataX_pre))) dataY_pre = Variable(torch.Tensor(np.array(dataY_pre))) print(dataY_pre.shape) test_pre = model(dataX_pre) print(test_pre.shape) with pd.ExcelWriter("C:\\Users\\ZMK\\Desktop\\GRU\\GRU-pre-ydh.xlsx", engine='openpyxl') as writer: for i in range(6): test_pre_data = test_pre[:,:,i].data.numpy().reshape(-1,1) y_test_pre = dataY_pre[:,:,i].data.numpy().reshape(-1,1) test_pre_data_inv = mm_y_pre.inverse_transform(test_pre_data) y_test_inv =mm_y_pre.inverse_transform(y_test_pre) # plt.figure(figsize=(10,5)) # plt.plot(y_test_inv) # plt.plot(test_pre_data_inv) # plt.legend(('real', 'predict'),fontsize='15') # plt.show() print('MAE/RMSE/NSE') print(mean_absolute_error(y_test_inv, test_pre_data_inv)) print(np.sqrt(mean_squared_error(y_test_inv, test_pre_data_inv))) print(nash_sutcliffe_efficiency(y_test_inv, test_pre_data_inv)) y_test_inv = pd.DataFrame(y_test_inv, columns=[f'True Node {i+1}']) test_pre_data_inv = pd.DataFrame(test_pre_data_inv, columns=[f'pre Node {i+1}']) # 将结果保存到不同的工作表中 test_pre_data_inv.to_excel(writer, sheet_name=f'True Node {i+1}', index=False) y_test_inv.to_excel(writer, sheet_name=f'pre Node {i+1}', index=False)